#---------------------------------------------------------
# Are Intermediary Constraints Priced?
# Du, Hébert, Huber
# The Review of Financial Studies 2022
#---------------------------------------------------------

#---------------------------------------------------------
# Assembles all asset classes into a panel for GMM
#---------------------------------------------------------

#---------------------------------------------------------
# Set-up
#---------------------------------------------------------

if (exists('script_path')) {
  rm(list = setdiff(ls(), 'script_path'))
} else {
  args = commandArgs(trailingOnly = T)
  if (length(args) > 0) {
    script_path <- args[1]  
    rm(list = setdiff(ls(), 'script_path'))
  } else {
    rm(list = ls())
    script_path <- '~/Dropbox/ForwardArbitrage/CodeReplication/'
  } 
}

# Inputs
source(paste0(script_path, 'RCode/ArbFunc.R'))
load(paste0(script_path, 'OutputInterim/All_Rates.RData')) 
load(paste0(script_path, 'OutputInterim/Basis_and_Returns.RData')) 
basis_pc_monthly <- fread(paste0(script_path, 'OutputInterim/PCportsMonthly.csv')) #last day of each month in basis_pc

# Initialization / working solely with OIS
currency_names_single <- paste(single_table$fund_currency, single_table$invest_currency, sep = '_')
currency_single_sans_CHF <- currency_names_single[which(substr(currency_names_single, 1, 3) != 'CHF')]
currency_single_sans_CAD <- currency_names_single[which(substr(currency_names_single, 1, 3) != 'CAD')]

#---------------------------------------------------------
# Construct monthly factors and testing assets
#---------------------------------------------------------

# Returns of single currency and portfolios (factors) as of last day of month
# Retrieve single currency OIS and IBOR 1M-forward-3M returns, and OIS 1M-f-1M returns, keep only obs with complete returns (okay to miss CHF in OIS)
list_single_returns <- list()
counter <- 1
for (rate in c('ois', 'ibor')) {
  if (rate == 'ois') {
    return_names <- c('1M_fwd_1M', '1M_fwd_3M')
    currency_check <- currency_single_sans_CHF
  } else if (rate == 'ibor') {
    return_names <- '1M_fwd_3M'
    currency_check <- currency_single_sans_CAD
  }
  for (return_tenor in return_names) {
    temp <- basis_and_returns[, c('Date', paste(currency_names_single, rate, 'log_return', return_tenor, sep = '_')), with = FALSE] 
    temp <- temp[complete.cases(temp[, paste(currency_check, rate, 'log_return', return_tenor, sep = '_'), with = FALSE])]
    names(temp) <- c('Date', paste(currency_names_single, rate, 'ret', return_tenor, sep = '_'))
    temp[, paste('dd', rate, return_tenor, sep = '_') := max(Date), by = floor_date(Date, unit = 'month')]
    temp <- temp[Date <= trading_end_date]
    temp <- temp[Date == get(paste('dd', rate, return_tenor, sep = '_'))]
    temp <- temp[, Date := as.integer(paste0(year(get(paste('dd', rate, return_tenor, sep = '_'))), 
                                             sprintf('%02d', month(get(paste('dd', rate, return_tenor, sep = '_'))))))]
    list_single_returns[[counter]] <- temp
    counter <- counter + 1
  } 
}

# Retrieve portfolio OIS and IBOR portfolio returns.
list_portfolio_returns <- list()
counter <- 1
for (rate in c('ois', 'ibor')) {
  return_tenor <- '1M_fwd_3M'
  portfolio_names <- c('classic_carry', 'single_3v3', 'equal_pairs', 'top5_basis')
  for (port_name in portfolio_names) {
    temp <- basis_and_returns[, c('Date', paste('portfolio', rate, 'log_return', port_name, return_tenor, sep = '_')), with = FALSE] 
    temp <- temp[complete.cases(temp)]
    names(temp) <- c('Date', paste('port', rate, 'ret', substr(port_name, 1, regexpr('_', port_name) - 1), return_tenor, sep = '_'))
    temp[, paste('dd_port', rate, substr(port_name, 1, regexpr('_', port_name) - 1), return_tenor, sep = '_') := max(Date), by = floor_date(Date, unit = 'month')]
    temp <- temp[Date <= trading_end_date]
    temp <- temp[Date == get(paste('dd_port', rate, substr(port_name, 1, regexpr('_', port_name) - 1), return_tenor, sep = '_'))]
    temp <- temp[, Date := as.integer(paste0(year(get(paste('dd_port', rate, substr(port_name, 1, regexpr('_', port_name) - 1), return_tenor, sep = '_'))), 
                                             sprintf('%02d', month(get(paste('dd_port', rate, substr(port_name, 1, regexpr('_', port_name) - 1), return_tenor, sep = '_'))))))]
    list_portfolio_returns[[counter]] <- temp
    counter <- counter + 1
  } 
}

# Retrieve portfolio OIS and IBOR factors.
list_factor <- list()
counter <- 1
for (rate in c('ois', 'ibor')) {
  return_tenor <- '3M'
  portfolio_names <- c('classic_carry', 'single_3v3', 'equal_pairs', 'top5_basis')
  for (port_name in portfolio_names) {
    temp <- basis_and_returns[, c('Date', paste('factor', rate, 'spot', return_tenor, port_name, sep = '_')), with = FALSE] 
    temp <- temp[complete.cases(temp)]
    temp[, paste('dd_factor', rate, return_tenor, port_name, sep = '_') := max(Date), by = floor_date(Date, unit = 'month')]
    temp <- temp[Date <= trading_end_date]
    temp <- temp[Date == get(paste('dd_factor', rate, return_tenor, port_name, sep = '_'))]
    temp <- temp[, Date := as.integer(paste0(year(get(paste('dd_factor', rate, return_tenor, port_name, sep = '_'))), 
                                             sprintf('%02d', month(get(paste('dd_factor', rate, return_tenor, port_name, sep = '_'))))))]
    list_factor[[counter]] <- temp
    counter <- counter + 1
  } 
}

portfolio_monthly <- Reduce(function(x, y) {merge(x, y, all = TRUE, by = 'Date')},
                       c(list_single_returns, list_portfolio_returns, list_factor))

# Merge + lag forward arbitrage returns as they are calculated as from t to t+1, and our convention is t-1 to t;
# take stock of variable names here and lag later so that the last row doesn't get dropped
return_names <- names(portfolio_monthly)[grepl('ret', names(portfolio_monthly)) & !grepl('dd', names(portfolio_monthly))]

# Merge in USD OIS 1M / 3M
portfolio_monthly <- merge(portfolio_monthly, all_rates[, c('Date', 'USD_ois_1M', 'USD_ois_3M'), with = FALSE], 
                           by.x = 'dd_port_ois_classic_1M_fwd_3M', by.y = 'Date',  all.x = TRUE, all.y = FALSE)
portfolio_monthly[Date == 200407, USD_ois_1M := all_rates[Date == ymd('2004-07-29'), USD_ois_1M]] #1M OIS missing on 2004-07-30

#---------------------------------------------------------
# Merge in stock returns and convert from percentage points to levels
market <- fread(paste0(script_path, 'DataSkeleton/AssetClasses/F-F_Research_Data_Factors_monthly.csv'))
market[, Mkt := `Mkt-RF` + RF]
market[, Tbill_1M := shift(RF * 12, n = 1L, type = 'lead')]
portfolio_monthly <- merge(portfolio_monthly, market[, c('Date', 'Mkt', 'Tbill_1M'), with = FALSE], 
                           by = 'Date', all = TRUE)

# Lag forward arbitrage returns; lag here so as to not lose a month
portfolio_monthly[, (return_names) := lapply(.SD, function(x) shift(x, n = 1L, type = 'lag')), .SDcols = return_names]

#---------------------------------------------------------
# Merge FF25
ff <- fread(paste0(script_path, 'DataSkeleton/AssetClasses/25_Portfolios_5x5.csv'))
names(ff) <- c('Date', sapply(seq.int(1, 25, by = 1), function(x) paste('FF25', sprintf('%02d', x), sep = '_')))
portfolio_monthly <- merge(portfolio_monthly, ff, by = 'Date', all = TRUE)

# Change all returns to be in levels as opposed to percentages
percentage_names <- names(portfolio_monthly)[!(grepl('dd', names(portfolio_monthly)) | grepl('Date', names(portfolio_monthly)) | grepl('weight', names(portfolio_monthly)))]
portfolio_monthly[, (percentage_names) := lapply(.SD, function(x) x / 100), .SDcols = percentage_names]

#---------------------------------------------------------
# Merge US Government Bonds, already in levels
# As shown by mean (us_gov[, lapply(.SD, function(x) mean(x)), .SDcols = names(us_gov)[-11]]), Sylvia numbered Portfolio 2 Portfolio 10
us_gov <- fread(paste0(script_path, 'OutputInterim/Clean_US_bonds.csv'))
us_gov <- us_gov[apply(us_gov, 1, function(x) sum(is.na(x))) == 0, ]
us_gov[, dd_us_gov := Date]
us_gov[, Date := as.integer(paste0(year(dd_us_gov), sprintf('%02d', month(dd_us_gov))))]
portfolio_monthly <- merge(portfolio_monthly, us_gov, by = 'Date', all = TRUE)

#---------------------------------------------------------
# Merge Sovereign Bonds ** Note the spelling of sovereign; need to raise to non-log return
sovereign <- fread(paste0(script_path, 'OutputInterim/EMBI/clean_soveriegn_portfolios.csv'))
sovereign[, names(sovereign)[grepl('*bv', names(sovereign))] := NULL]
rx_names <- names(sovereign)[grepl('^rx', names(sovereign))]
sovereign_names <- sapply(seq.int(1, 6, by = 1), function(x) paste('Sovereign', sprintf('%02d', x), sep = '_'))

sovereign <- sovereign[complete.cases(sovereign)]
sovereign[, (rx_names) := lapply(.SD, function(x) x + rf / 12), .SDcols = rx_names]
sovereign[, (sovereign_names) := lapply(.SD, function(x) exp(x) - 1), .SDcols = rx_names]

names(sovereign)[1] <- 'dummy_date'
sovereign[, year := substr(dummy_date, 1, 4)]
sovereign[, month := substr(dummy_date, regexpr('m', dummy_date) + 1, nchar(dummy_date))]
sovereign[, Date := as.integer(paste0(year, sprintf('%02d', as.integer(month))))]
portfolio_monthly <- merge(portfolio_monthly, sovereign[, c('Date', sovereign_names), with = FALSE], by = 'Date', all = TRUE)

#---------------------------------------------------------
# Merge FX, monthly level return, not logged, already 'excess' since involves shorting
# **Note: read_excel is finicky with sheet names and so I'm indexing the sheets directly. "All currencies" = 2, "Developed currencies" = 3.
fx_all <- data.table(read_excel(paste0(script_path, 'DataSkeleton/AssetClasses/CurrencyPortfolios_05312021.xls'), sheet = 2))
fx_all[, (2 + 6):(ncol(fx_all)) := NULL]
names(fx_all) <- c('dd_fx_all', sapply(seq.int(1, 6, by = 1), function(x) paste('FX_all', sprintf('%02d', x), sep = '_')))
fx_all[, dd_fx_all := ymd(dd_fx_all)]
fx_all[, Date := as.integer(paste0(year(dd_fx_all), sprintf('%02d', month(dd_fx_all))))]
portfolio_monthly <- merge(portfolio_monthly, fx_all, by = 'Date', all = TRUE)

fx_developed <- data.table(read_excel(paste0(script_path, 'DataSkeleton/AssetClasses/CurrencyPortfolios_05312021.xls'), sheet = 3))
fx_developed[, (2 + 5):(ncol(fx_developed)) := NULL]
names(fx_developed) <- c('dd_fx_developed', sapply(seq.int(1, 5, by = 1), function(x) paste('FX_developed', sprintf('%02d', x), sep = '_')))
fx_developed[, dd_fx_developed := ymd(dd_fx_developed)]
fx_developed[, Date := as.integer(paste0(year(dd_fx_developed), sprintf('%02d', month(dd_fx_developed))))]
portfolio_monthly <- merge(portfolio_monthly, fx_developed, by = 'Date', all = TRUE)

#---------------------------------------------------------
# Merge CDS, monthly level return
cds <- fread(paste0(script_path, 'OutputInterim/Clean_cds.csv'))
cds <- cds[complete.cases(cds)]
names(cds)[1] <- 'dd_cds'
cds <- cds[, c('yyyymm', names(cds)[grepl('return', names(cds))], 'dd_cds'), with = F]
names(cds) <- c('Date', paste('CDS', seq.int(1, 7), sep = '_'), 'dd_cds')
portfolio_monthly <- merge(portfolio_monthly, cds, by = 'Date', all = TRUE)

#---------------------------------------------------------
# Merge corporate bond, compute monthly return using last observation of the month
# 5 portfolios by rating: row monthly index level till 2003-12-23, then dd level
corp_names <- sapply(seq.int(1, 10, by = 1), function(x) paste('Corp_bond', sprintf('%02d', x), sep = '_'))

corp_bond_credit <- data.table(read_excel(paste0(script_path, 'DataSkeleton/AssetClasses/bloomberg_corp_returns_06082021.xlsx'), sheet = 'value'))
corp_bond_credit[, dd_corp_bond := ymd(Dates)]
corp_bond_credit[, Date := as.integer(paste0(year(dd_corp_bond), sprintf('%02d', month(dd_corp_bond))))]
orig_corp_names <- names(corp_bond_credit)[grepl('Index', names(corp_bond_credit))]
corp_bond_credit[, (orig_corp_names) := lapply(.SD, function(x) as.numeric(x)), .SDcols = orig_corp_names]
corp_bond_credit <- corp_bond_credit[complete.cases(corp_bond_credit)]
corp_bond_credit <- corp_bond_credit[corp_bond_credit[, .I[which.max(dd_corp_bond)], .(Date)]$V1]
corp_bond_credit[, (corp_names[1:5]) := lapply(.SD, function(x) x / shift(x, n = 1, type = 'lag') - 1), .SDcols = orig_corp_names]

corp_bond_credit <- corp_bond_credit[, c('Date', 'dd_corp_bond', corp_names[1:5]), with = F]
corp_bond_credit <- corp_bond_credit[complete.cases(corp_bond_credit)]
portfolio_monthly <- merge(portfolio_monthly, corp_bond_credit, by = 'Date', all = TRUE)

#---------------------------------------------------------
# Merge commodities, compute monthly level return using month-end info
commodity <- fread(paste0(script_path, 'OutputInterim/Clean_commodities.csv'))
names(commodity) <- c('Date', paste('Commod', seq(1, 23, by = 1), sep = '_'))
portfolio_monthly <- merge(portfolio_monthly, commodity, by = 'Date', all = TRUE)

#---------------------------------------------------------
# Merge options, calculated using OptionMetrics following Constantinides et al
load(paste0(script_path, 'OutputInterim/Options/port_returns_90adj_18.RData'))
option_18_90adj <- port_returns_18
option_18_90adj[, Date := as.numeric(paste0(year(ymd(date)), sprintf('%02d', month(ymd(date)))))]
option_18_90adj[, date := NULL]
setcolorder(option_18_90adj, c('Date'))
names(option_18_90adj) <- c('Date', paste('Option_18_90adj_new', seq(1, 18, by = 1), sep = '_'))

load(paste0(script_path, 'OutputInterim/Options/port_returns_full_18.RData'))
option_18_full <- port_returns_18
option_18_full[, Date := as.numeric(paste0(year(ymd(date)), sprintf('%02d', month(ymd(date)))))]
option_18_full[, date := NULL]
setcolorder(option_18_full, c('Date'))
names(option_18_full) <- c('Date', paste('option_18_full_new', seq(1, 18, by = 1), sep = '_'))

portfolio_monthly <- Reduce(function(x, y) merge(x, y, by = 'Date', all = TRUE), list(portfolio_monthly, option_18_90adj, option_18_full))

#---------------------------------------------------------
# Merge monthly pc of basis returns
basis_pc_monthly[, yyyymm := as.numeric(paste0(substr(Date, 1, 4), sprintf('%02d', as.numeric(substr(Date, unlist(gregexpr(pattern = 'm', Date)) + 1, 999)))))]
pc_old_names <- names(basis_pc_monthly)[grepl('pc', names(basis_pc_monthly))]
pc_names <- sub('_stand_', '_', names(basis_pc_monthly)[grepl('pc', names(basis_pc_monthly))])
for (i in 1:length(pc_names)) {
  portfolio_monthly[, pc_names[i] := basis_pc_monthly[, pc_old_names[i], with = F][match(portfolio_monthly$Date, basis_pc_monthly$yyyymm)]]
  portfolio_monthly[, pc_names[i] := shift(get(pc_names[i]), type = 'lag') / 1e2] #lag after merging to have the last month
}

# Save in Stata, csv, RData
setcolorder(portfolio_monthly, c('Date', names(portfolio_monthly)[grepl('dd', names(portfolio_monthly))],
                                 'Mkt', 'Tbill_1M', 'USD_ois_1M', 'USD_ois_3M'))
fwrite(portfolio_monthly, paste0(script_path, 'OutputInterim/Factors_And_Test_Assets_Monthly.csv'))
save(portfolio_monthly, file = paste0(script_path, 'OutputInterim/Factors_And_Test_Assets_Monthly.RData'))

names(portfolio_monthly)[grepl('factor', names(portfolio_monthly))] <-
  sub('factor', 'f', names(portfolio_monthly)[grepl('factor', names(portfolio_monthly))])
write_dta(portfolio_monthly, paste0(script_path, 'OutputInterim/Factors_And_Test_Assets_Monthly.dta'))
